Wildfire PM by day for different zips codes

First we can look at the concentrations in each zip code and day.

Cases by day for different zips codes

Next we can look at the cases in each zip code and day.

Defining the Wildfire Period

Next we’ll classify each day-zip combination as experiencing some wildfire exposure or not. Wildfire period is defined as any day where the zip code has a PM2.5 value greater than zero (note: some are very low). See how the WF period(s) is/are more clearly the second plot which uses a threshold of 1, instead of 0.

Calculating RR

Now we can begin to calculate RR, first for the entire zip codes (not race stratified). We’ll use a constant of o.1 to fill in spots where there are no cases during the exposure or control periods.

constant <- 0.1

rates_overall <- foo %>%
  mutate(total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
  group_by(zip, wildfire) %>% 
  summarise(cases = sum(total),
            days = length(svc_from_dt)) %>%
    mutate(casesPerDay =  cases/days)

RR_overall <- foo %>%
  mutate(total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
  group_by(zip, wildfire) %>% 
  summarise(cases = sum(total),
            days = length(svc_from_dt)) %>%
    mutate(cases = ifelse(cases == 0, constant, cases),
      casesPerDay = cases/days) %>%
    select(zip, wildfire, casesPerDay) %>%
    spread(key = wildfire, value = casesPerDay) %>%
    mutate(RateRatio = WFperiod/nonWFperiod,
           ln_RateRatio = log(RateRatio)) %>%
  rename(WF_rate = WFperiod, 
         nonWF_rate = nonWFperiod)

DT::datatable({rates_overall}) %>% DT::formatRound(5,5) 
DT::datatable({RR_overall}) %>% DT::formatRound(c(2:5),3)

Plot Overall rate ratios

## Calculating race-specific RR

Now we can begin to calculate race-specific RR.

cases <- foo %>%
  group_by(zip, wildfire) %>% 
  summarise(white = sum(white),
            hispanic = sum(hispanic),
            black = sum(black),
            native_am = sum(native_am),
            asian_pi = sum(asian_pi),
            other = sum(other),
            days = length(svc_from_dt)) %>%
  gather(white, hispanic, black, native_am, asian_pi,other, key = race, value = cases) %>%
  select(-days) %>%
   spread(key = wildfire, value = cases) %>%
  mutate(remove = ifelse(WFperiod == 0 & nonWFperiod == 0, "remove","keep")) %>%
  rename(WF_cases = WFperiod, 
         noWF_cases = nonWFperiod)


rates <- foo %>%
  group_by(zip, wildfire) %>% 
  summarise(white = sum(white),
            hispanic = sum(hispanic),
            black = sum(black),
            native_am = sum(native_am),
            asian_pi = sum(asian_pi),
            other = sum(other),
            days = length(svc_from_dt)) %>%
  gather(white, hispanic, black, native_am, asian_pi,other, key = race, value = cases) %>%
  mutate(casesPerDay = ifelse(cases == 0, constant/days, cases/days)) %>% 
  select(-days, -cases) %>%
  spread(key = wildfire, value = casesPerDay) %>%
  rename(WF_rate = WFperiod, 
         noWF_rate = nonWFperiod)

RR_race <- merge(cases, rates) %>%
  mutate(RateRatio = WF_rate/noWF_rate, 
         ln_RateRatio = log(WF_rate/noWF_rate))

DT::datatable({RR_race}) %>% DT::formatRound(c(3:6),3)

Look at the distrubutions of race_specific rate ratios, need to look into why so many of these are focused around 1.5…?

Here’s the same data in a BoxPlot

RR_race %>%
    ggplot() + geom_boxplot(aes(x= race, y= ln_RateRatio))+ geom_jitter(aes(x= race, y= ln_RateRatio, color=race), alpha=0.2) + guides(color=FALSE)

and here it is after removing the zipcodes where no cases exisited in either (WF/nonWFperiod)

There is a big difference in the RRs after removing the data.

RR_race %>%
  filter(remove == "keep") %>%
    ggplot() + geom_boxplot(aes(x= race, y= ln_RateRatio))+ geom_jitter(aes(x= race, y= ln_RateRatio, color=race), alpha=0.2) + guides(color=FALSE)

Examine the Sensitivity to Parameters

Two parameters:

  • the threshold for inclusion in the wildfire period
  • the constant used to fill in missing cases

need to be examined to show how they may change the results.

rm(booHoo)

constants <- c(0.01, 0.05,0.1,0.2,0.3,0.4, 0.5)
thresholds <- c(0:45)


for(constant in constants){
  for(threshold in thresholds){
  
   boo <- read.dta("~/Wildfires_kurt/race eth of asthma ED visits by zip day_exp merged.dta") %>% 
      mutate(wildfire = ifelse(pm_25 > threshold, "WFperiod", "nonWFperiod"),
             total = white + hispanic + black + native_am + asian_pi + other + missing) %>%
      rename(zip = bene_addr_zip) %>%
      group_by(zip, wildfire) %>% 
      summarise(cases = sum(total),
                days = length(svc_from_dt)) %>%
      mutate(cases = ifelse(cases == 0, constant, cases),
             casesPerDay = cases/days) %>%
      select(zip, wildfire, casesPerDay) %>%
      spread(key = wildfire, value = casesPerDay) %>%
      mutate(RateRatio = WFperiod/nonWFperiod,
             ln_RateRatio = log(WFperiod/nonWFperiod),
             const = constant,
             thresh = threshold)
   
  
   
   
   if(exists("booHoo")) {
     booHoo <- bind_rows(booHoo, boo)
   }
   else {
     booHoo <- boo
   }

  }
  }
    
booHoo %>% 
  mutate(county = "San Diego") %>% 
   ggplot() + geom_boxplot(aes(x=county, y= ln_RateRatio))+ geom_jitter(aes(x=county, y= ln_RateRatio), alpha=0.2) + guides(color=FALSE) + facet_grid(const~thresh)

booHoo %>% 
  group_by(const, thresh)%>%
  summarise(mean_LnRR = mean(ln_RateRatio, na.rm=T),
            var_LnRR = var(ln_RateRatio, na.rm=T),
            min_LnRR = min(ln_RateRatio, na.rm=T),
            max_LnRR = max(ln_RateRatio, na.rm=T)) %>%
  gather(min_LnRR,mean_LnRR,max_LnRR,var_LnRR, key = statistic, value = value) %>%
  filter(statistic == "mean_LnRR") %>%
  ggplot() + geom_tile(aes(x=factor(thresh), y=factor(const), fill= value)) + 
  facet_grid(statistic~.) + 
  scale_fill_gradient(low = "red", high = "green") + 
  ylab("value of the constant usd to replace no cases") + 
  xlab("value above which we define the wildfire period")

booHoo %>% 
  ggplot() + 
  geom_smooth(aes(x=thresh, y= ln_RateRatio, group=const, color=factor(const)), method = "auto") 

Merge, Weight, and Model

Combine with other covariates, calculate the weights and